# This script reproduces Figure 8 #
rm(list=ls())
options(digits=4)
pkg <- c("PanelMatch", "ggplot2", "plm", 
         "matrixStats",
         "foreign", 
         "stringi", "stringdist",
         "DataCombine","lmtest", "multiwayvcov",
         "data.table",
         "dplyr", 
         "stargazer",
         "clubSandwich", "mediation",
         "doBy")
lapply(pkg, require, character.only = TRUE)

# load datasets
proprietary <- read.csv("proprietary.csv")
load("non_p.RData")

# merge 
d_sub1 <- merge(d_sub1, proprietary, by = c("county_id", "time"), all.x = T)

###
## main ols regression t+1, with and without lags
model_area_con <- plm(lead(log_transaction_area,1) ~ 
                        tour,
                      index = c("county_id", "month"),
                      effect = "twoways", model = "within",
                      data = d_sub1)
vcov_area_con <- clubSandwich::vcovCR(model_area_con, type="CR1S", cluster = d_sub1$county_id)
se_area_con <- coeftest(model_area_con, vcov_area_con)

lower_area_con_95 <- se_area_con["tour", "Estimate"] - qnorm(.975) * se_area_con["tour", "Std. Error"]
upper_area_con_95 <- se_area_con["tour", "Estimate"] + qnorm(.975) * se_area_con["tour", "Std. Error"]

lower_area_con_90 <- se_area_con["tour", "Estimate"] - qnorm(.95) * se_area_con["tour", "Std. Error"]
upper_area_con_90 <- se_area_con["tour", "Estimate"] + qnorm(.95) * se_area_con["tour", "Std. Error"]
est_nolag <- se_area_con["tour", "Estimate"]

##
model_area_con_lag <- plm(lead(log_transaction_area,1) ~ 
                            tour + 
                            log_transaction_area_l1 +
                            log_transaction_area_l2 +
                            log_transaction_area_l3,
                            # agp_treat_l1 + 
                            # agp_treat_l1_missing + 
                           
                          index = c("county_id", "month"),
                          effect = "twoways", model = "within",
                          
                          #  as.factor(county_id) + as.factor(time),
                          #    as.factor(province_id) * as.factor(year),
                          data = d_sub1)
vcov_area_con_lag <- clubSandwich::vcovCR(model_area_con_lag, type="CR1S", cluster = d_sub1$county_id)
# coeftest(model_area_con, vcov_area_con)
se_area_con_lag <- coeftest(model_area_con_lag, vcov_area_con_lag)
# se_area_con_f1["tour", "Estimate"]
# se_area_con_f1["tour", "Std. Error"]

lower_area_con_95_lag <- se_area_con_lag["tour", "Estimate"] - qnorm(.975) * se_area_con_lag["tour", "Std. Error"]
upper_area_con_95_lag <- se_area_con_lag["tour", "Estimate"] + qnorm(.975) * se_area_con_lag["tour", "Std. Error"]

lower_area_con_90_lag <- se_area_con_lag["tour", "Estimate"] - qnorm(.95) * se_area_con_lag["tour", "Std. Error"]
upper_area_con_90_lag <- se_area_con_lag["tour", "Estimate"] + qnorm(.95) * se_area_con_lag["tour", "Std. Error"]

est_lag <- se_area_con_lag["tour", "Estimate"]

pdf("output/Figure8.pdf")
plot(x = 0, xlab = "", ylab = "Effect Estimates", 
     pch = "",
     ylim = c(-.28, .01),
     xlim = c(0.95, 1.15),
     xaxt = "n",
     main = "Effects of Inspection \n on Land Auctions Next Month")
lines(c(1,1), c(lower_area_con_95, upper_area_con_95))
lines(c(1,1), c(lower_area_con_90, upper_area_con_90), 
      lwd = 5, col = "grey")
points(1, est_nolag, pch = 19)
mtext(side = 1, "Not controlling for \n lagged outcomes", at = 1, 
      outer = F,
      line = 1.2)

lines(c(1.1,1.1), c(lower_area_con_95_lag, upper_area_con_95_lag))
lines(c(1.1,1.1), c(lower_area_con_90_lag, upper_area_con_90_lag), 
      lwd = 5, col = "grey")
points(1.1, est_lag, pch = 19)
mtext(side = 1, "Controlling for \n lagged outcomes", at = 1.1, 
      outer = F,
      line = 1.2)
#mtext(1.1, -.07, "Controlling for \n lagged outcomes")

abline(h = 0, lty = 3)
dev.off()